clear all
set more off

*******************************************************************************
Compile and Clean Flat Files by Year
*******************************************************************************/

local years "2007 2010 2011 2012 2013 2014"
local months "Jan Feb Mar Apr May June July Aug Sept Oct Nov Dec"

foreach y in `years' {

clear
gen init=.
save employer_firm/temp`y',replace
	
	local month=0
	foreach m in `months' {
		local month=`month'+1
		di "`month'"
		di " `m' `y'"
	
		*import relevant variables
		if "`y'"=="2014" {
		use CleanTitle City soc Employer Sector NAICS3 NAICS4 NAICS5 NAICS6 Fips State Edu MaxEdu Exp MaxExp using  Main-`y'-`m', clear
		rename *, lower
		}
		
		else {
		use cleantitle city soc employer sector naics3 naics4 naics5 naics6 fips state edu maxedu exp maxexp using  Main-`y'-`m', clear
		}
		
		gen month=`month'
		
		destring exp sector naics3 naics4 naics5 naics6 maxedu edu, replace force
		
		recode sector naics3 naics4 naics5 naics6 edu maxedu exp maxexp (-999=.)
		gen naics=naics6 
		replace naics=naics5 if naics==.
		replace naics=naics4 if naics==.
		replace naics=naics3 if naics==.
		replace naics=sector if naics==.
		
		
		drop if inlist(soc, "-999", "n/a", "", "na")
		drop if length(soc)!=7
		gen occsoc=subinstr(soc, "-", "",.)
		drop soc

		replace employer="" if employer=="na"
		replace city="" if city=="na"
		replace cleantitle="" if cleantitle=="na"
		
		foreach var of varlist employer cleantitle {
			replace `var'=subinstr(`var', "-"," ",.)
			replace `var'=subinstr(`var', "."," ",.)
			replace `var'=subinstr(`var', "_"," ",.)
			replace `var'=subinstr(`var', "&"," ",.)
			replace `var'=subinstr(`var', "/"," ",.)
			replace `var'=subinstr(`var', ","," ",.)
			replace `var'=subinstr(`var', "*"," ",.)
			replace `var'=subinstr(`var', "'"," ",.)
			replace `var'=subinstr(`var', "  "," ",.)
			replace `var'=subinstr(`var', "   "," ",.)
			replace `var'=subinstr(`var', "    "," ",.)
			replace `var'=subinstr(`var', "     "," ",.)
			replace `var'=trim(`var')
			replace `var'=lower(`var')	
		}
		
		capture drop if regexm(exp, "Metro")
		capture drop if regexm(maxedu, "Phoe")

		drop sector naics3 naics4 naics5 naics6
		
		append using employer_firm/temp`y'.dta
		
		save employer_firm/temp`y'.dta, replace
	}
	
	gen year=`y'
	
	duplicates tag month year city cleantitle employer, gen(dup_month_city)
	duplicates tag year city cleantitle employer, gen(dup_year_city)
	duplicates tag month year state cleantitle employer, gen(dup_month_st)
	duplicates tag year state cleantitle employer, gen(dup_year_st)
	
	capture drop init
	save employer_firm/temp`y'.dta, replace
	
}


use ./temp2007.dta, clear
append using ./temp2010 ./temp2011 ./temp2012 ./temp2013 ./temp2014 

egen long employerid=group(employer)
egen long jobid=group(cleantitle)

compress
rename state st
merge m:1 st using statefips.dta, keep(match) nogen keepusing(statefip)
drop st
drop if inlist(statefip, 66, 72, 60, 78)
drop if statefip==.
unique statefip
assert r(sum)==51

preserve
drop employer cleantitle
saveold ./bgt_employer_firm_07_to_14.dta, replace version(11)
restore

preserve
	keep cleantitle jobid
	duplicates drop cleantitle jobid, force
	save ./temp_jobcrosswalk.dta, replace
restore
	keep employerid employer
	duplicates drop employerid employer, force
	save ./temp_employercrosswalk.dta, replace

************************************************************************************
/*Clean and Append Data to Firm-Job dataset*/


use employer_firm/bgt_employer_firm_07_10_12.dta, clear

tab statefip
drop if inlist(statefip, 2, 15, 66, 72, 11)
unique statefip

gen educ=edu
replace educ=maxedu if educ==.

gen exper=exp
replace exper=maxexp if exp==.

drop edu maxedu exp maxexp

	gen expreq=exper!=.
	gen edureq=(educ>=12 &!missing(educ))
	gen exp2=(exper>=2 &!missing(exper))
	gen expgt2=(exper>2 &!missing(exper))
	gen exp5=(exper>=5 &!missing(exper))
	gen exp4=exper>=4 & exper!=.
	
	gen edubd=(educ==16  &!missing(educ))
	gen edugd=(educ>=18 &!missing(educ))
	gen edugths=(educ>12 &!missing(educ))
	gen edubdgd=(educ>=16 &!missing(educ))
	gen jobs=1
	
gen date=ym(year, month)	
format date %tm
gen test=dofm(date)
replace test=qofd(test)
format test %tq
rename test quarter
drop date 

merge m:1 statefip quarter using ur_state_quarter.dta, keep(master match) nogen
	
merge m:1 statefip year using bls_state_laborforce_statistics_2007_2014.dta, keep(master match) nogen
rename s_unemprate ur

egen firm_job=group(employerid jobid)
egen firm_occ=group(employerid occsoc)
egen occ_year=group(occsoc year)
egen state_year=group(statefip year)
egen firm_job_state=group(employerid jobid statefip)
egen ind_occ=group(occsoc naics)

/*Occupation Crosswalking*/
replace occsoc="435111" if occsoc=="43511."
replace occsoc="191029" if occsoc=="191020" // general code for marine biologist combine
*drop military occupations
drop if substr(occsoc,1,2)=="55"

/*Crosswalk Occupation Changes*/
*occupations codes that split 1:many from 2000 to 2010
preserve
	import excel soc2000 soc2000_name soc2010 soc2010_name using soc_2000_to_2010_crosswalk.xlsx, clear cellrange(A9:D868) allstring
	drop if soc2000==soc2010
	duplicates tag soc2000, gen(dup00)
	duplicates tag soc2010, gen(dup10)
	keep if dup10==0 & dup00!=0

	gen occsoc=soc2010
	tempfile a
	save `a', replace
restore

merge m:1 occsoc using `a', keepusing(soc2000 soc2000_name) gen(_m_soc2000)

unique occsoc //821
replace occsoc=soc2000 if _m_soc2000==3



/*VET OCCUPATION SHARES*/

merge m:1 occsoc using acs3yr_2005to2007_veteran_employment_shares.dta, gen(_m_vetshare) keep(master match) assert(2 3)

gen occsoc_broad=substr(occsoc,1,2)
destring occsoc_broad, replace

gen occsoc_broadgroup=2 if occsoc_broad>=11 & occsoc_broad<=13
replace occsoc_broadgroup=3 if occsoc_broad>=15 & occsoc_broad<=29
replace occsoc_broadgroup=4 if occsoc_broad>=31 & occsoc_broad<=39
replace occsoc_broadgroup=5 if occsoc_broad>=41 & occsoc_broad<=43
replace occsoc_broadgroup=6 if occsoc_broad>=45 & occsoc_broad<=49
replace occsoc_broadgroup=7 if occsoc_broad>=51 & occsoc_broad<=53
label variable occsoc_broadgroup "Groups of 2-digit SOC CODES- for HWOL data merge"

tostring occsoc_broad, replace

merge m:1 statefip year occsoc_broadgroup using acs_vet_supdemrate_occsocbroad_year_allmeasures.dta, keep(master match) nogen

save bgt_employer_firm_2007_2010_2012_02182016.dta, replace

drop if firm_job==.

replace ur=ur/100

			
	




